# load required libraries
library("lme4")
library("ggplot2")
library("here")
library("tidyverse")
library("emmeans")
library("brms")
library("DHARMa")
library("glmmTMB")
library("performance")
library("scales")

1 Exploration and visualisation

A pseudonymized/codified dataset was used as a precaution due to ethical considerations (patient-level clinical data). The full dataset is available upon request (see manuscript data availability statement).

# load the data
df <- read_csv(here("./data/evSCAs_codified.csv"), show_col_types = FALSE) %>%
  rename(Experiment = `Experiment (= Sample collection Number)`) %>% 
  rename(HBB.genotype = `HBB Genotype`) %>% 
  fill(c(ID, Experiment, Age, Gender, Village, Ethnicity, Symptomatic), .direction = "down") %>% 
  # mutate(across(c(ID, Experiment, Sample, HBB.genotype, Gender, Village, Ethnicity, Symptomatic), as.factor)) %>% 
  mutate(across(c(`Date of IFA staining`, `Date of IFA counting`), as.Date)) %>%
  select(Sample, Total_parasite_counts, Gametocyte_counts, HBB.genotype, varATS_par_µL_D0,
         Age, Gender, Village, Ethnicity, Symptomatic) %>%
  mutate(HBB.genotype = case_when(
    HBB.genotype == 0 ~ "HbAA",
    HBB.genotype == 1 ~ "HbAC",
    HBB.genotype == 2 ~ "HbAS",
  )) %>%
  mutate(across(c(Sample, HBB.genotype, Gender, Village, Ethnicity, Symptomatic), as.factor)) %>% 
  mutate(HBB.genotype = fct_relevel(HBB.genotype, "HbAA")) # set HbAA as baseline reference
str(df)
## tibble [96 × 10] (S3: tbl_df/tbl/data.frame)
##  $ Sample               : Factor w/ 32 levels "1","2","3","4",..: 1 1 1 2 2 2 3 3 3 4 ...
##  $ Total_parasite_counts: num [1:96] 1023 1034 1033 1030 1038 ...
##  $ Gametocyte_counts    : num [1:96] 40 33 45 49 31 44 17 22 10 54 ...
##  $ HBB.genotype         : Factor w/ 3 levels "HbAA","HbAC",..: 1 1 1 2 2 2 1 1 1 3 ...
##  $ varATS_par_µL_D0     : num [1:96] 6490 6490 6490 3350 3350 ...
##  $ Age                  : num [1:96] 6 6 6 9.6 9.6 9.6 7.4 7.4 7.4 7.3 ...
##  $ Gender               : Factor w/ 2 levels "0","1": 1 1 1 2 2 2 2 2 2 2 ...
##  $ Village              : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Ethnicity            : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Symptomatic          : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 1 1 1 ...
summary(df)
##      Sample   Total_parasite_counts Gametocyte_counts HBB.genotype
##  1      : 3   Min.   :1000          Min.   : 10.00    HbAA:42     
##  2      : 3   1st Qu.:1009          1st Qu.: 27.75    HbAC:36     
##  3      : 3   Median :1020          Median : 44.00    HbAS:18     
##  4      : 3   Mean   :1029          Mean   : 70.58                
##  5      : 3   3rd Qu.:1039          3rd Qu.: 63.50                
##  6      : 3   Max.   :1164          Max.   :610.00                
##  (Other):78                                                       
##  varATS_par_µL_D0      Age         Gender Village Ethnicity Symptomatic
##  Min.   :   54    Min.   : 4.900   0:48   1:18    1:78      0:39       
##  1st Qu.: 1508    1st Qu.: 6.375   1:48   2:27    2:18      1:57       
##  Median : 4564    Median : 7.650          3:45                         
##  Mean   :10615    Mean   : 8.409          4: 6                         
##  3rd Qu.: 7089    3rd Qu.: 8.150                                       
##  Max.   :87894    Max.   :35.800                                       
## 

Box plots and scatter plots of mean conversion rate (averaged across replicates) versus the different categorical/continuous variables.

df_grouped <- df %>% 
  group_by(Sample) %>% 
  mutate(mean_prop = mean(Gametocyte_counts/Total_parasite_counts)) %>% 
  distinct(Sample, .keep_all = TRUE)

boxplot(df_grouped$mean_prop ~ df_grouped$HBB.genotype)

boxplot(df_grouped$mean_prop ~ df_grouped$Village)

boxplot(df_grouped$mean_prop ~ df_grouped$Symptomatic)

boxplot(df_grouped$mean_prop ~ df_grouped$Ethnicity)

boxplot(df_grouped$mean_prop ~ df_grouped$Gender)

plot(df_grouped$mean_prop ~ df_grouped$Age)

ggplot(df_grouped, 
       aes(x = HBB.genotype, y = Gametocyte_counts/Total_parasite_counts,color = HBB.genotype)) +
  geom_boxplot() +
  geom_jitter(width = 0.1)

ggplot(df_grouped, 
       aes(fill = HBB.genotype, x = Gametocyte_counts/Total_parasite_counts)) +
  geom_density(alpha=0.4) +
  geom_vline(data=df_grouped %>% group_by(HBB.genotype) %>% summarise(genotype.median = median(Gametocyte_counts/Total_parasite_counts)), aes(xintercept=genotype.median, color=HBB.genotype),
             linetype="dashed")

More detailed plots of conversion rate:

ggplot(df, aes(x = varATS_par_µL_D0, y = Gametocyte_counts/Total_parasite_counts, color = HBB.genotype)) +
  geom_jitter(height = 0.05, width = 0, alpha = 0.4) 

ggplot(df, aes(x = log(varATS_par_µL_D0),
               y = log(Gametocyte_counts / Total_parasite_counts),
               color = HBB.genotype,
               fill = HBB.genotype,
               group = Sample)) +
  geom_jitter(height = 0.05, width = 0, alpha = 0.3) +
  stat_summary(fun = mean, geom = "point", size = 3, shape=23, fill="grey", alpha=0.9) +
  facet_wrap(~ HBB.genotype) +
  labs(title = "log(varATS) versus log(SCR) (all technical replicates)")

ggplot(df, aes(x = log(varATS_par_µL_D0), 
               y = log(Gametocyte_counts / Total_parasite_counts), 
               color = HBB.genotype, 
               fill = HBB.genotype,
               group = Sample)) +
  geom_jitter(height = 0.05, width = 0, alpha = 0.3) +
  stat_summary(fun = mean, geom = "point", size = 3, shape=23, fill="grey", alpha=0.9) +
  facet_wrap(~ Village) +
  labs(title = "log(varATS) versus SCR (all technical replicates) - facet by village")

ggplot(df, aes(x = log(varATS_par_µL_D0), 
               y = log(Gametocyte_counts / Total_parasite_counts), 
               color = HBB.genotype, 
               fill = HBB.genotype,
               group = Sample)) +
  geom_jitter(height = 0.05, width = 0, alpha = 0.3) +
  stat_summary(fun = mean, geom = "point", size = 3, shape=23, fill="grey", alpha=0.9) +
  facet_wrap(~ HBB.genotype*Village) +
  labs(title = "log(varATS) versus log(SCR) (all technical replicates) - facet by village*genotype")

ggplot(df, aes(x = log(varATS_par_µL_D0), 
               y = log(Gametocyte_counts / Total_parasite_counts), 
               color = HBB.genotype, 
               fill = HBB.genotype)) +
  geom_jitter(height = 0.05, width = 0, alpha = 0.3) +
  stat_summary(fun = mean, geom = "point", size = 3, shape=23, fill="grey", alpha=0.9) +
  facet_wrap(~ Gender) +
  labs(title = "log(varATS) versus log(SCR) (all technical replicates) - facet by gender")

ggplot(df, aes(x = log(varATS_par_µL_D0), 
               y = log(Gametocyte_counts / Total_parasite_counts), 
               color = HBB.genotype, 
               fill = HBB.genotype)) +
  geom_jitter(height = 0.05, width = 0, alpha = 0.3) +
  stat_summary(fun = mean, geom = "point", size = 3, shape=23, fill="grey", alpha=0.9) +
  facet_wrap(~ Ethnicity) +
  labs(title = "log(varATS) versus log(SCR) (all technical replicates) - facet by ethnicity")

ggplot(df, aes(x = log(varATS_par_µL_D0), 
               y = log(Gametocyte_counts / Total_parasite_counts), 
               color = HBB.genotype, 
               fill = HBB.genotype)) +
  geom_jitter(height = 0.05, width = 0, alpha = 0.3) +
  stat_summary(fun = mean, geom = "point", size = 3, shape=23, fill="grey", alpha=0.9) +
  facet_wrap(~ Symptomatic) +
  labs(title = "log(varATS) versus log(SCR) (all technical replicates) - facet by symptomatic")

1.1 Conclusions based on visual exploration

Hb genotype shows a strong visual association with SCR.

The demographic factors are less pronounced, barring possibly village. However, there are only 2 samples in village 4, both of which have higher than average parasitemia. This effect is probably not relevant in our data, but might be caused by outlying samples. We likely do not have enough representative high parasitemia samples (across different villages) to make any conclusive statements about this association.

Looking at the SCR vs parasitemia plots (grouped by genotype), there does not appear to be a clear association between the two. However, the two highest parasitemia samples (all in HbAS) do show the highest SCR, which might underlies the interaction effect between parasitemia*genotype we observe in some of the models (see model comparison). Because of the high variation in SCR across parasitemia, the stratification of parasitemia by genotype and the fact that we do not have a well balanced representation across different parasitemia values, we opt to be careful here to avoid over-interpreting any significant parasitemia (or interaction) effects.

Because of the small sample size and unbalanced groups, and its effect on model assumptions for more complex models (see model comparison), we therefore focus mainly on a descriptive modelling approach with simple parsimonous models with the single parameter of interest (Hbb genotype), which is backed by the likelihood ratio tests shown in the model comparison section. That being said, this approach cannot fully exclude confounding or interaction effects, and sample size and power limitations should still be kept in mind when interpreting some of these GL(M)M results, with emphasis on effect sizes over exact p-values.

2 Final model

To keep things interpretable, and taking into account the fact that some variables are not covered by a lot of samples (e.g. sparse/uneven representation across parasitemia and village), plus the fact that most estimates remain consistent in directionality and magnitude and LRTs favour it, we opt for the most parsimonious model:

A generalized linear mixed model using the beta-binomial family with a random effect to account for the technical replicates and a per-genotype dispersion parameter.

\[SCR \sim HBB.genotype + (1 | Sample)\]

\[\phi \sim HBB.genotype\]

However, as discussed in the model comparison section, this approach cannot fully exclude confounding or interaction effects, and sample size and power limitations should still be kept in mind when interpreting some of these GL(M)M results, with emphasis on effect sizes over exact p-values.

For the Bayesian parameter estimates (which are less liberal than the standard Wald Z estimates), we had to resort to a more simple binomial model (which exhibits some overdispersion), because there were convergence issues with the more complex beta-binomial one.

2.1 Model fit

model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype + (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)
summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + (1 | Sample)
## Dispersion:                                                                     
##       ~HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##       840       858      -413       826        89 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Sample (Intercept) 0.1984   0.4454  
## Number of obs: 96, groups:  Sample, 32
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -3.5764     0.1254 -28.510  < 2e-16 ***
## HBB.genotypeHbAC   0.5998     0.1832   3.273  0.00106 ** 
## HBB.genotypeHbAS   2.0997     0.2642   7.948 1.89e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Dispersion model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        7.3205     0.6509  11.246  < 2e-16 ***
## HBB.genotypeHbAC  -0.4812     0.8574  -0.561    0.575    
## HBB.genotypeHbAS  -4.4575     0.8007  -5.567 2.59e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2.2 Model assumptions

All model assumptions are met:

# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.1782, p-value = 0.556
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

2.3 Estimates

The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:

emm <- emmeans(model, ~ HBB.genotype)
pairs(emm, adjust="bonferroni", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.82 0.334 Inf      1.17      2.82    1   3.273  0.0032
##  HbAS / HbAA       8.16 2.160 Inf      4.34     15.37    1   7.948  <.0001
##  HbAS / HbAC       4.48 1.200 Inf      2.36      8.52    1   5.593  <.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 3 tests 
## Tests are performed on the log odds ratio scale

The above table not only shows the estimated odds ratio and its corresponding p-value, but also the confidence interval of this odds ratio (= asymp.LCL and asymp.UCL).

These p-values are adjusted for multiple testing using the Bonferroni method.

The estimates are already transformed from the log-scale, so they can be directly interpreted as odds ratios (OR).

The estimated SCR per genotype is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.3.1 Bayesian estimates

As a more conservative estimate, we can use a Bayesian approach while fitting a regular binomial generalized linear mixed model with the same random effect. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).

This binomial model does not fulfil all model assumptions (degree of heteroskedasticity), but the estimates are in line with the beta-binomial model.

# fit a bayesian GLMM
brm_model <- brm(
  Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + (1 | Sample),
  family = binomial,
  data = df,
  iter = 5000,
  chains = 8,
  seed = 42,
  cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
##  Family: binomial 
##   Links: mu = logit 
## Formula: Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + (1 | Sample) 
##    Data: df (Number of observations: 96) 
##   Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 20000
## 
## Multilevel Hyperparameters:
## ~Sample (Number of levels: 32) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.63      0.09     0.48     0.83 1.00     2729     5612
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -3.58      0.17    -3.92    -3.24 1.00     1948     2832
## HBB.genotypeHbAC     0.60      0.25     0.08     1.10 1.00     1907     3186
## HBB.genotypeHbAS     1.93      0.31     1.32     2.53 1.00     2832     4622
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Model assumptions (fit via glmer in order to use DHARMa):

bm_model <- glmer(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~ 
    HBB.genotype + (1 | Sample), 
  family = binomial, data = df)
summary(bm_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + (1 | Sample)
##    Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    1276.0    1286.2    -634.0    1268.0        92 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -12.2206  -0.9947   0.0491   0.7415  15.3930 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  Sample (Intercept) 0.3268   0.5717  
## Number of obs: 96, groups:  Sample, 32
## 
## Fixed effects:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -3.5841     0.1558 -23.002  < 2e-16 ***
## HBB.genotypeHbAC   0.5962     0.2284   2.610  0.00905 ** 
## HBB.genotypeHbAS   1.9299     0.2816   6.852 7.26e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) HBB.HAC
## HBB.gntyHAC -0.682        
## HBB.gntyHAS -0.553  0.377
# Simulate residuals
sim_res <- simulateResiduals(bm_model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.7091, p-value = 0.13
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

Bayesian odds ratios estimates and credible intervals:

brm_emm <- emmeans(brm_model, ~ HBB.genotype)
pairs(brm_emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio lower.HPD upper.HPD
##  HbAC / HbAA       1.82     0.992      2.84
##  HbAS / HbAA       6.89     3.383     11.87
##  HbAS / HbAC       3.80     1.755      6.52
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

Bayesian SCR estimates:

confint(brm_emm, type="response")

3 Model comparison

Briefly:

  • Most models report similar SRC estimates/odds ratios.
  • The binomial model with genotype as the only independent variable meets most model assumptions, although the variance differences between the genotype groups might be better captured by a beta binomial model (Levene Test for homogeneity of variance is significant). This could indicate that there is a systematic dependency of the dispersion/variance on another variable in the model.
  • The beta binomial model with genotype-only and a per-genotype dispersion factor is similar, but does pass the variance homogeneity test.
  • Bayesian models do not converge for any of the beta-binomial models.
    • Bayesian estimates are preferred over the parametric (wald z) estimates/p-values because the latter are too liberal (confidence intervals too narrow/p-values too optimistic, since they depend on large sample size/asymptotic assumptions).
  • An alternative approach to fixing overdispersion/accounting for per-genotype dispersion differences would be to use an observation-level random effect in a binomial model, instead of a beta-binomial model. These models still show problematic model assumptions unfortunately.

On adding additional covariates:

Since we have such small sample (and group) sizes, we need to be careful in adding too many covariates to the model, and model assumptions still need to be met for these more complex models. On the other hand, we should also avoid missing any important biological factors or potential confounding factors.

LRTs of complex vs reduced model indicate that the simple models are OK to use. Since these focus on the main biological factor of interest, we will opt to use these and clarify in the text that we are unable to exclude the potential relevance of other factors.

Moreover, the direction and magnitude of our estimates remains similar across the different models.

  • Models that add parasitemia as a variable (+ optional interaction effect) all show problematic residual vs predicted deviation. The best options appears to be beta-binomial genotype*parasitemia, but DHARMA fitting results in warnings (step failure) and we likely do not have enough samples for every genotype*parasitemia level to make conclusive statements.
  • Models with interaction effects make it more difficult to report on the estimated per-group SCR and the odds ratio contrasts (because these effects will differ for different levels of parasitemia).
  • Adding village makes the interaction effect stronger, likely because there are so few values for village 4, which all have high parasitemias. But due to the small sample size per group, I believe we cannot rely on these model estimates.
    • Adding gender can omit the need for the interaction effect, by providing good model assumptions, even though both parasitemia and gender are not significant. Could this be caused by non-uniform groups of samples (or random noise in the data?) that receive an improved fit when taking into account gender/interactions/etc.?
    • the small number of samples for different levels of the additional variables could be a cause of concern.
    • we could consider modelling some of these variables as random effects too, e.g. to account for between-village differences (but the effect should be similar to adding it as a normal variable since there are only a few levels).

Decision:

  • use a simple genotype-only binomial model (allows Bayesian credible intervals) or
  • ~~use a beta-binomial model with genotype*parasitemia (interaction) (without Bayesian estimates): more difficult to interpret odds ratios and trends appear to remain similar
  • use a simple genotype-only beta-binomial model (without Bayesian estimates)

3.1 GLMM Binomial - genotype-only

model <- glmer(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~ 
    HBB.genotype + (1 | Sample), 
  family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + (1 | Sample)
##    Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    1276.0    1286.2    -634.0    1268.0        92 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -12.2206  -0.9947   0.0491   0.7415  15.3930 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  Sample (Intercept) 0.3268   0.5717  
## Number of obs: 96, groups:  Sample, 32
## 
## Fixed effects:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -3.5841     0.1558 -23.002  < 2e-16 ***
## HBB.genotypeHbAC   0.5962     0.2284   2.610  0.00905 ** 
## HBB.genotypeHbAS   1.9299     0.2816   6.852 7.26e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) HBB.HAC
## HBB.gntyHAC -0.682        
## HBB.gntyHAS -0.553  0.377
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.7091, p-value = 0.13
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.82 0.415 Inf      1.06      3.10    1   2.610  0.0246
##  HbAS / HbAA       6.89 1.940 Inf      3.56     13.33    1   6.852  <.0001
##  HbAS / HbAC       3.80 1.090 Inf      1.93      7.45    1   4.631  <.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale

3.2 GLMM Binomial genotype + parasitemia

model <- glmer(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~ 
    HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample), 
  family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample)
##    Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    1276.1    1288.9    -633.1    1266.1        91 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -12.2497  -0.9812   0.0147   0.7501  15.3566 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  Sample (Intercept) 0.3066   0.5537  
## Number of obs: 96, groups:  Sample, 32
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -3.5962     0.1514 -23.758  < 2e-16 ***
## HBB.genotypeHbAC          0.6586     0.2260   2.914  0.00357 ** 
## HBB.genotypeHbAS          1.8723     0.2761   6.782 1.19e-11 ***
## scale(varATS_par_µL_D0)   0.1443     0.1046   1.379  0.16783    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in abbreviate(rn, minlength = 11): abbreviate used with non-ASCII chars
## 
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate used with non-ASCII chars
##             (Intr) HBB.HAC HBB.HAS
## HBB.gntyHAC -0.679                
## HBB.gntyHAS -0.537  0.335         
## s(ATS__µL_D -0.060  0.200  -0.151
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.64, p-value = 0.162
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.93 0.437 Inf      1.14      3.28    1   2.914  0.0100
##  HbAS / HbAA       6.50 1.800 Inf      3.41     12.42    1   6.782  <.0001
##  HbAS / HbAC       3.37 0.984 Inf      1.70      6.68    1   4.152  0.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale

3.3 GLMM Binomial genotype * parasitemia

model <- glmer(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~ 
    HBB.genotype * scale(varATS_par_µL_D0) + (1 | Sample), 
  family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype * scale(varATS_par_µL_D0) + (1 | Sample)
##    Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    1276.2    1294.1    -631.1    1262.2        89 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -12.2810  -0.9965   0.0406   0.7505  15.3173 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  Sample (Intercept) 0.2704   0.52    
## Number of obs: 96, groups:  Sample, 32
## 
## Fixed effects:
##                                          Estimate Std. Error z value Pr(>|z|)
## (Intercept)                               -3.5745     0.1429 -25.007  < 2e-16
## HBB.genotypeHbAC                           0.7976     0.2810   2.839  0.00453
## HBB.genotypeHbAS                           1.7840     0.2647   6.740 1.58e-11
## scale(varATS_par_µL_D0)                   -0.0957     0.1571  -0.609  0.54234
## HBB.genotypeHbAC:scale(varATS_par_µL_D0)   0.7020     0.5658   1.241  0.21466
## HBB.genotypeHbAS:scale(varATS_par_µL_D0)   0.3788     0.2038   1.859  0.06309
##                                             
## (Intercept)                              ***
## HBB.genotypeHbAC                         ** 
## HBB.genotypeHbAS                         ***
## scale(varATS_par_µL_D0)                     
## HBB.genotypeHbAC:scale(varATS_par_µL_D0)    
## HBB.genotypeHbAS:scale(varATS_par_µL_D0) .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in abbreviate(rn, minlength = 11): abbreviate used with non-ASCII chars
## 
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate used with non-ASCII chars
##             (Intr) HBB.HAC HBB.HAS s(ATS_ HBBHAC
## HBB.gntyHAC -0.509                              
## HBB.gntyHAS -0.540  0.275                       
## s(ATS__µL_D -0.095  0.048   0.051               
## HBBHAC:(ATS  0.026  0.629  -0.014  -0.278       
## HBBHAS:(ATS  0.073 -0.037  -0.191  -0.771  0.214
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.3675, p-value = 0.374
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
## NOTE: Results may be misleading due to involvement in interactions
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       2.22 0.624 Inf      1.15      4.29    1   2.839  0.0126
##  HbAS / HbAA       5.95 1.580 Inf      3.20     11.07    1   6.740  <.0001
##  HbAS / HbAC       2.68 0.882 Inf      1.24      5.80    1   3.000  0.0076
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale

3.4 GLMM betabinomial genotype-only

Still shows heteroskedasticity…

model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype + (1 | Sample),
  family = betabinomial(link = "logit"),
  data = df
)
summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + (1 | Sample)
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     909.8     922.6    -449.9     899.8        91 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Sample (Intercept) 0.2459   0.4958  
## Number of obs: 96, groups:  Sample, 32
## 
## Dispersion parameter for betabinomial family ():  126 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -3.4901     0.1572 -22.208  < 2e-16 ***
## HBB.genotypeHbAC   0.5614     0.2230   2.517   0.0118 *  
## HBB.genotypeHbAS   1.8728     0.2635   7.108 1.18e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.7026, p-value = 0.1
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.75 0.391 Inf      1.04      2.96    1   2.517  0.0317
##  HbAS / HbAA       6.51 1.710 Inf      3.51     12.07    1   7.108  <.0001
##  HbAS / HbAC       3.71 0.985 Inf      1.99      6.92    1   4.939  <.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale

3.5 GLMM betabinomial genotype-only - per-genotype dispersion

Adding per-genotype dispersion factor appears to improve model fit.

model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype + (1 | Sample),
    # HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample),
    # HBB.genotype + (1 | Sample),
    # HBB.genotype + scale(varATS_par_µL_D0) + HBB.genotype*scale(varATS_par_µL_D0) + (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)
summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + (1 | Sample)
## Dispersion:                                                                     
##       ~HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##       840       858      -413       826        89 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Sample (Intercept) 0.1984   0.4454  
## Number of obs: 96, groups:  Sample, 32
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -3.5764     0.1254 -28.510  < 2e-16 ***
## HBB.genotypeHbAC   0.5998     0.1832   3.273  0.00106 ** 
## HBB.genotypeHbAS   2.0997     0.2642   7.948 1.89e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Dispersion model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        7.3205     0.6509  11.246  < 2e-16 ***
## HBB.genotypeHbAC  -0.4812     0.8574  -0.561    0.575    
## HBB.genotypeHbAS  -4.4575     0.8007  -5.567 2.59e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.1782, p-value = 0.556
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.82 0.334 Inf      1.19       2.8    1   3.273  0.0031
##  HbAS / HbAA       8.16 2.160 Inf      4.40      15.2    1   7.948  <.0001
##  HbAS / HbAC       4.48 1.200 Inf      2.39       8.4    1   5.593  <.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale

3.6 GLMM betabinomial genotype + parasitemia - per-genotype dispersion

Adding parasitemia leads to strong quantile deviations in the residuals.

model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    # HBB.genotype (1 | Sample),
    HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample),
    # HBB.genotype + (1 | Sample),
    # HBB.genotype + scale(varATS_par_µL_D0) + HBB.genotype*scale(varATS_par_µL_D0) + (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)
summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample)
## Dispersion:                                                                     
##       ~HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     840.9     861.4    -412.5     824.9        88 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Sample (Intercept) 0.1959   0.4426  
## Number of obs: 96, groups:  Sample, 32
## 
## Conditional model:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -3.58461    0.12504 -28.667  < 2e-16 ***
## HBB.genotypeHbAC         0.64247    0.18665   3.442 0.000577 ***
## HBB.genotypeHbAS         2.04853    0.26547   7.716  1.2e-14 ***
## scale(varATS_par_µL_D0)  0.09986    0.09487   1.053 0.292505    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Dispersion model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        7.2975     0.6469  11.281  < 2e-16 ***
## HBB.genotypeHbAC  -0.4574     0.8538  -0.536    0.592    
## HBB.genotypeHbAS  -4.3602     0.7887  -5.528 3.24e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.2095, p-value = 0.512
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.90 0.355 Inf      1.23      2.94    1   3.442  0.0017
##  HbAS / HbAA       7.76 2.060 Inf      4.16     14.45    1   7.716  <.0001
##  HbAS / HbAC       4.08 1.140 Inf      2.12      7.86    1   5.026  <.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale

3.7 GLMM betabinomial genotype * parasitemia - per-genotype dispersion

Adding an interaction effect appears to fix the pattern/dependencies in the residuals, but i) leads to some amount of heteroskedasticity and ii) results in fitting warnings (step failure).

# create scaled parasitemia for interpretation in emmeans
model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype * scaled_varATS_par_µL_D0 + (1 | Sample),
    dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df %>% mutate(scaled_varATS_par_µL_D0 = scale(varATS_par_µL_D0))
)
summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype * scaled_varATS_par_µL_D0 + (1 | Sample)
## Dispersion:                                                                     
##       ~HBB.genotype
## Data: df %>% mutate(scaled_varATS_par_µL_D0 = scale(varATS_par_µL_D0))
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     840.6     866.3    -410.3     820.6        86 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Sample (Intercept) 0.1659   0.4074  
## Number of obs: 96, groups:  Sample, 32
## 
## Conditional model:
##                                          Estimate Std. Error z value Pr(>|z|)
## (Intercept)                              -3.56662    0.11631 -30.665  < 2e-16
## HBB.genotypeHbAC                          0.79805    0.22593   3.532 0.000412
## HBB.genotypeHbAS                          1.96085    0.25804   7.599 2.98e-14
## scaled_varATS_par_µL_D0                  -0.08766    0.12853  -0.682 0.495211
## HBB.genotypeHbAC:scaled_varATS_par_µL_D0  0.68526    0.45401   1.509 0.131212
## HBB.genotypeHbAS:scaled_varATS_par_µL_D0  0.32055    0.17911   1.790 0.073516
##                                             
## (Intercept)                              ***
## HBB.genotypeHbAC                         ***
## HBB.genotypeHbAS                         ***
## scaled_varATS_par_µL_D0                     
## HBB.genotypeHbAC:scaled_varATS_par_µL_D0    
## HBB.genotypeHbAS:scaled_varATS_par_µL_D0 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Dispersion model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        7.3117     0.6509  11.233  < 2e-16 ***
## HBB.genotypeHbAC  -0.4904     0.8547  -0.574    0.566    
## HBB.genotypeHbAS  -4.3815     0.7834  -5.593 2.23e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
## Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L,
## : Fitting terminated with step failure - check results carefully

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.0895, p-value = 0.702
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

Since there is an interaction effect here, it is not trivial to just predict the effect/odds ratio for the different genotypes. We can however visualise how the association with parasitemia changes for these different genotype groups (i.e. the slope of the parasitemia effect):

# slope of parasitemia for different genotypes
emtrends(model, pairwise ~ HBB.genotype, var = "scaled_varATS_par_µL_D0")
## $emtrends
##  HBB.genotype scaled_varATS_par_µL_D0.trend    SE  df asymp.LCL asymp.UCL
##  HbAA                               -0.0877 0.129 Inf   -0.3396     0.164
##  HbAC                                0.5976 0.435 Inf   -0.2558     1.451
##  HbAS                                0.2329 0.125 Inf   -0.0115     0.477
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast    estimate    SE  df z.ratio p.value
##  HbAA - HbAC   -0.685 0.454 Inf  -1.509  0.2865
##  HbAA - HbAS   -0.321 0.179 Inf  -1.790  0.1730
##  HbAC - HbAS    0.365 0.453 Inf   0.805  0.6997
## 
## P value adjustment: tukey method for comparing a family of 3 estimates
# visualise slopes
emmip(model, HBB.genotype ~ scaled_varATS_par_µL_D0, cov.reduce = range)
## Warning: `aes_()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`
## ℹ The deprecated feature was likely used in the emmeans package.
##   Please report the issue at <https://github.com/rvlenth/emmeans/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Ignoring unknown labels:
## • linetype : "HBB.genotype"

It appears that the parasitemia association is slightly negative for HbAA and positive for the others. However, none of the slopes are significant. As we saw in the exploratory plots, parasitemia is quite variable and a few high/low samples are enough to create these slope lines.

When we make the contrasts between the genotype groups, we need to choose at which level of parasitemia to do so. The mean value seems to give higher effect sizes than in the genotype-only models (likely because there are a few high outlying parasitemia samples), but the median parasitemia comparison seems to be more in-line with previous models.

# emm <- emmeans(model, ~ HBB.genotype)
# pairs(emm, simple = "HBB.genotype", adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))

# compare genotypes at average parasitemia
emm <- emmeans(model, ~ HBB.genotype*scaled_varATS_par_µL_D0, cov.reduce=mean)
pairs(emm, simple = "HBB.genotype", adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## scaled_varATS_par_µL_D0 = -2.62574566762518e-17:
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       2.22 0.502 Inf      1.31      3.77    1   3.532  0.0012
##  HbAS / HbAA       7.11 1.830 Inf      3.88     13.01    1   7.599  <.0001
##  HbAS / HbAC       3.20 0.963 Inf      1.58      6.48    1   3.862  0.0003
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale
# compare genotypes at median parasitemia instead
emm <- emmeans(model, ~ HBB.genotype*scaled_varATS_par_µL_D0, cov.reduce=median)
pairs(emm, simple = "HBB.genotype", adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## scaled_varATS_par_µL_D0 = -0.324263549611677:
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.78 0.315 Inf      1.17      2.69    1   3.253  0.0033
##  HbAS / HbAA       6.40 1.780 Inf      3.34     12.26    1   6.698  <.0001
##  HbAS / HbAC       3.60 0.992 Inf      1.89      6.87    1   4.648  <.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale
# compare at specific value using emmeans(model, ~ HBB.genotype*scaled_varATS_par_µL_D0, at = list(scaled_varATS_par_µL_D0 = 40)), but needs to happen in scaled range

3.8 Bayesian estimates

None of the beta binomial models defined above can be fit using the Bayesian approach (“parts of the model have not converged” warning).

An alternative approach using a regular binomial model plus a per-observation random factor (which is an alternative approach to modelling overdispersion) does converge, but at the expense of homogeneity of variance (see next section).

brm_model <- brm(
  bf(
    Gametocyte_counts | trials(Total_parasite_counts) ~ 
      HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample),
    phi ~ HBB.genotype  # dispersion (precision) depends on genotype
  ), 
  family = beta_binomial(link = "logit"),
  data = df,
  iter = 5000,
  chains = 4,
  # control = list(adapt_delta = 0.95),
  seed = 42,
  cores = 8
)
## Compiling Stan program...
## Start sampling
## Warning: There were 6612 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 3378 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: There were 1 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 6.69, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
summary(brm_model)
## Warning: Parts of the model have not converged (some Rhats are > 1.05). Be
## careful when analysing the results! We recommend running more iterations and/or
## setting stronger priors.
## Warning: There were 6612 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: beta_binomial 
##   Links: mu = logit; phi = log 
## Formula: Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample) 
##          phi ~ HBB.genotype
##    Data: df (Number of observations: 96) 
##   Draws: 4 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 10000
## 
## Multilevel Hyperparameters:
## ~Sample (Number of levels: 32) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.36      1.59     0.07     4.53 2.01        5        7
## 
## Regression Coefficients:
##                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                -1.06      7.74   -24.00     8.07 2.40        7
## phi_Intercept           130.21    122.08    15.71   436.19 2.11        6
## HBB.genotypeHbAC          7.02     17.81   -27.09    48.89 2.08        9
## HBB.genotypeHbAS         -7.40     21.25   -41.20    27.06 6.69        4
## scalevarATS_par_µL_D0     2.96      2.92    -0.13     7.15 2.29        5
## phi_HBB.genotypeHbAC    -38.29    144.47  -292.41   304.66 1.95       10
## phi_HBB.genotypeHbAS     73.33    198.89  -127.45   393.59 5.37        5
##                       Tail_ESS
## Intercept                   17
## phi_Intercept                4
## HBB.genotypeHbAC            21
## HBB.genotypeHbAS             6
## scalevarATS_par_µL_D0       25
## phi_HBB.genotypeHbAC         5
## phi_HBB.genotypeHbAS        NA
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
brm_model <- brm(
  bf(
    Gametocyte_counts | trials(Total_parasite_counts) ~ 
      HBB.genotype * scale(varATS_par_µL_D0) + (1 | Sample),
    phi ~ HBB.genotype  # dispersion (precision) depends on genotype
  ), 
  family = beta_binomial(link = "logit"),
  data = df,
  iter = 5000,
  chains = 4,
  # control = list(adapt_delta = 0.95),
  seed = 42,
  cores = 8
)
## Compiling Stan program...
## Start sampling
## Warning: There were 2845 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 1097 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: There were 1 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 2.71, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
summary(brm_model)
## Warning: Parts of the model have not converged (some Rhats are > 1.05). Be
## careful when analysing the results! We recommend running more iterations and/or
## setting stronger priors.
## Warning: There were 2845 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: beta_binomial 
##   Links: mu = logit; phi = log 
## Formula: Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype * scale(varATS_par_µL_D0) + (1 | Sample) 
##          phi ~ HBB.genotype
##    Data: df (Number of observations: 96) 
##   Draws: 4 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 10000
## 
## Multilevel Hyperparameters:
## ~Sample (Number of levels: 32) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.56      0.18     0.35     0.87 1.21       14       27
## 
## Regression Coefficients:
##                                        Estimate Est.Error l-95% CI u-95% CI
## Intercept                                 -3.47      1.60    -3.88    -3.23
## phi_Intercept                             22.18     23.34     6.43    77.99
## HBB.genotypeHbAC                           0.10      9.84   -26.27    15.71
## HBB.genotypeHbAS                           1.72      1.65     1.18     2.60
## scalevarATS_par_µL_D0                      0.00      0.84    -0.39     1.77
## HBB.genotypeHbAC:scalevarATS_par_µL_D0    -6.57     25.48   -75.75    14.31
## HBB.genotypeHbAS:scalevarATS_par_µL_D0     0.26      0.87    -1.50     0.73
## phi_HBB.genotypeHbAC                      19.30     89.23   -37.66   304.80
## phi_HBB.genotypeHbAS                     -18.40     22.54   -74.66    -3.27
##                                        Rhat Bulk_ESS Tail_ESS
## Intercept                              1.27       13       91
## phi_Intercept                          1.44        8       17
## HBB.genotypeHbAC                       1.57       12       11
## HBB.genotypeHbAS                       1.35       11        6
## scalevarATS_par_µL_D0                  1.10       25       26
## HBB.genotypeHbAC:scalevarATS_par_µL_D0 2.45       17       12
## HBB.genotypeHbAS:scalevarATS_par_µL_D0 1.15       18       34
## phi_HBB.genotypeHbAC                   2.05        5       16
## phi_HBB.genotypeHbAS                   1.42        8       26
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
brm_model <- brm(
  bf(
    Gametocyte_counts | trials(Total_parasite_counts) ~ 
      HBB.genotype + (1 | Sample),
    phi ~ HBB.genotype  # dispersion (precision) depends on genotype
  ), 
  family = beta_binomial(link = "logit"),
  data = df,
  iter = 5000,
  chains = 4,
  # control = list(adapt_delta = 0.95),
  seed = 42,
  cores = 8
)
## Compiling Stan program...
## Start sampling
## Warning: There were 7274 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 2726 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: There were 1 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 2.34, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
summary(brm_model)
## Warning: Parts of the model have not converged (some Rhats are > 1.05). Be
## careful when analysing the results! We recommend running more iterations and/or
## setting stronger priors.
## Warning: There were 7274 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: beta_binomial 
##   Links: mu = logit; phi = log 
## Formula: Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + (1 | Sample) 
##          phi ~ HBB.genotype
##    Data: df (Number of observations: 96) 
##   Draws: 4 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 10000
## 
## Multilevel Hyperparameters:
## ~Sample (Number of levels: 32) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.13      2.06     0.18     6.62 1.83        9       48
## 
## Regression Coefficients:
##                      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                4.36     13.42   -23.57    31.02 1.37        9
## phi_Intercept          171.54    120.05    57.08   540.69 1.49        8
## HBB.genotypeHbAC        -3.32     21.02   -53.13    47.35 1.36       12
## HBB.genotypeHbAS       -17.80     41.77  -142.23    37.90 1.43       11
## phi_HBB.genotypeHbAC   -70.30    118.10  -354.83   156.40 1.21       17
## phi_HBB.genotypeHbAS    38.60    123.85  -237.69   353.30 1.51       36
##                      Tail_ESS
## Intercept                  34
## phi_Intercept               4
## HBB.genotypeHbAC           29
## HBB.genotypeHbAS           14
## phi_HBB.genotypeHbAC      114
## phi_HBB.genotypeHbAS       52
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
brm_model <- brm(
  bf(
    Gametocyte_counts | trials(Total_parasite_counts) ~ 
      HBB.genotype + (1 | Sample),
    phi ~ HBB.genotype  # dispersion (precision) depends on genotype
  ), 
  family = beta_binomial(link = "logit"),
  data = df,
  iter = 5000,
  chains = 4,
  # control = list(adapt_delta = 0.95),
  seed = 42,
  cores = 8
)
## Compiling Stan program...
## Start sampling
## Warning: There were 7274 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 2726 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: There were 1 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 2.34, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
summary(brm_model)
## Warning: Parts of the model have not converged (some Rhats are > 1.05). Be
## careful when analysing the results! We recommend running more iterations and/or
## setting stronger priors.
## Warning: There were 7274 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: beta_binomial 
##   Links: mu = logit; phi = log 
## Formula: Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + (1 | Sample) 
##          phi ~ HBB.genotype
##    Data: df (Number of observations: 96) 
##   Draws: 4 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 10000
## 
## Multilevel Hyperparameters:
## ~Sample (Number of levels: 32) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.13      2.06     0.18     6.62 1.83        9       48
## 
## Regression Coefficients:
##                      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                4.36     13.42   -23.57    31.02 1.37        9
## phi_Intercept          171.54    120.05    57.08   540.69 1.49        8
## HBB.genotypeHbAC        -3.32     21.02   -53.13    47.35 1.36       12
## HBB.genotypeHbAS       -17.80     41.77  -142.23    37.90 1.43       11
## phi_HBB.genotypeHbAC   -70.30    118.10  -354.83   156.40 1.21       17
## phi_HBB.genotypeHbAS    38.60    123.85  -237.69   353.30 1.51       36
##                      Tail_ESS
## Intercept                  34
## phi_Intercept               4
## HBB.genotypeHbAC           29
## HBB.genotypeHbAS           14
## phi_HBB.genotypeHbAC      114
## phi_HBB.genotypeHbAS       52
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

3.9 Observation-level random effects

Since the betabinomial models do not converge when using the Bayesian fitting method, we could use an observation-level random effects (OLRE) to account for overdispersion instead:

df$obs <- seq_len(nrow(df))

brm_model <- brm(
  Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + (1 | Sample) + (1 | obs),
  family = binomial,
  data = df,
  iter = 5000,
  chains = 8,
  seed = 42,
  cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
##  Family: binomial 
##   Links: mu = logit 
## Formula: Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + (1 | Sample) + (1 | obs) 
##    Data: df (Number of observations: 96) 
##   Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 20000
## 
## Multilevel Hyperparameters:
## ~obs (Number of levels: 96) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.28      0.03     0.22     0.34 1.00     8732    13649
## 
## ~Sample (Number of levels: 32) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.61      0.09     0.46     0.82 1.00     7581    11682
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -3.59      0.17    -3.94    -3.25 1.00     6059     9312
## HBB.genotypeHbAC     0.60      0.25     0.08     1.10 1.00     6595     8893
## HBB.genotypeHbAS     1.92      0.31     1.30     2.54 1.00     7402     9327
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Assessing model fit for the ORLE requires the model to be fit using lme4 instead of brms.

model <- glmer(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~ 
    HBB.genotype + (1 | Sample) + (1 | obs), 
  family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + (1 | Sample) + (1 | obs)
##    Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     852.3     865.1    -421.1     842.3        91 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.26424 -0.24057 -0.01068  0.19490  0.91999 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  obs    (Intercept) 0.07229  0.2689  
##  Sample (Intercept) 0.30183  0.5494  
## Number of obs: 96, groups:  obs, 96; Sample, 32
## 
## Fixed effects:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -3.5994     0.1556 -23.126  < 2e-16 ***
## HBB.genotypeHbAC   0.6004     0.2281   2.632   0.0085 ** 
## HBB.genotypeHbAS   1.9233     0.2813   6.837 8.08e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) HBB.HAC
## HBB.gntyHAC -0.682        
## HBB.gntyHAS -0.553  0.377
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.6311, p-value = 0.162
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.82 0.416 Inf      1.07      3.11    1   2.632  0.0231
##  HbAS / HbAA       6.84 1.930 Inf      3.54     13.23    1   6.837  <.0001
##  HbAS / HbAC       3.75 1.080 Inf      1.91      7.37    1   4.599  <.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale

Unfortunately, the OLRE doesn’t seem to fix the overdispersion/per-genotype dispersion differences entirely (i.e. betabinomial with per-genotype dispersion is still preferred).

Even adding parasitemia to the OLRE, the problems remain.

model <- glmer(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~ 
    HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample) + (1 | obs), 
  family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample) +          (1 | obs)
##    Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     852.6     868.0    -420.3     840.6        90 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.26339 -0.23481 -0.00905  0.19339  0.88925 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  obs    (Intercept) 0.07227  0.2688  
##  Sample (Intercept) 0.28309  0.5321  
## Number of obs: 96, groups:  obs, 96; Sample, 32
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -3.6110     0.1515 -23.828  < 2e-16 ***
## HBB.genotypeHbAC          0.6604     0.2263   2.919  0.00351 ** 
## HBB.genotypeHbAS          1.8680     0.2764   6.758  1.4e-11 ***
## scale(varATS_par_µL_D0)   0.1387     0.1047   1.324  0.18535    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in abbreviate(rn, minlength = 11): abbreviate used with non-ASCII chars
## 
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate used with non-ASCII chars
##             (Intr) HBB.HAC HBB.HAS
## HBB.gntyHAC -0.679                
## HBB.gntyHAS -0.537  0.336         
## s(ATS__µL_D -0.060  0.200  -0.151
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.5688, p-value = 0.192
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.94 0.438 Inf      1.14      3.29    1   2.919  0.0098
##  HbAS / HbAA       6.48 1.790 Inf      3.39     12.38    1   6.758  <.0001
##  HbAS / HbAC       3.35 0.979 Inf      1.68      6.64    1   4.127  0.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale
model <- glmer(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~ 
    HBB.genotype * scale(varATS_par_µL_D0) + (1 | Sample) + (1 | obs), 
  family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype * scale(varATS_par_µL_D0) + (1 | Sample) +          (1 | obs)
##    Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     852.8     873.3    -418.4     836.8        88 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.27715 -0.24668 -0.01384  0.18366  0.85689 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  obs    (Intercept) 0.07219  0.2687  
##  Sample (Intercept) 0.24851  0.4985  
## Number of obs: 96, groups:  obs, 96; Sample, 32
## 
## Fixed effects:
##                                          Estimate Std. Error z value Pr(>|z|)
## (Intercept)                              -3.58993    0.14352 -25.014  < 2e-16
## HBB.genotypeHbAC                          0.80315    0.28208   2.847  0.00441
## HBB.genotypeHbAS                          1.78272    0.26576   6.708 1.97e-11
## scale(varATS_par_µL_D0)                  -0.09486    0.15767  -0.602  0.54741
## HBB.genotypeHbAC:scale(varATS_par_µL_D0)  0.70494    0.56801   1.241  0.21458
## HBB.genotypeHbAS:scale(varATS_par_µL_D0)  0.36739    0.20462   1.796  0.07257
##                                             
## (Intercept)                              ***
## HBB.genotypeHbAC                         ** 
## HBB.genotypeHbAS                         ***
## scale(varATS_par_µL_D0)                     
## HBB.genotypeHbAC:scale(varATS_par_µL_D0)    
## HBB.genotypeHbAS:scale(varATS_par_µL_D0) .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in abbreviate(rn, minlength = 11): abbreviate used with non-ASCII chars
## 
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate used with non-ASCII chars
##             (Intr) HBB.HAC HBB.HAS s(ATS_ HBBHAC
## HBB.gntyHAC -0.509                              
## HBB.gntyHAS -0.540  0.275                       
## s(ATS__µL_D -0.095  0.048   0.051               
## HBBHAC:(ATS  0.026  0.629  -0.014  -0.278       
## HBBHAS:(ATS  0.073 -0.037  -0.192  -0.771  0.214
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.3323, p-value = 0.394
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
## NOTE: Results may be misleading due to involvement in interactions
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       2.23 0.630 Inf      1.15      4.32    1   2.847  0.0123
##  HbAS / HbAA       5.95 1.580 Inf      3.19     11.08    1   6.708  <.0001
##  HbAS / HbAC       2.66 0.879 Inf      1.23      5.77    1   2.967  0.0085
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale

3.10 Adding additional variables

Adding too many additional variables could be problematic due to small sample/group sizes.

Model assumptions remain problematic without adding the interaction effect.

model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype + 
    scale(varATS_par_µL_D0) + scale(Age) + Gender + Village + Ethnicity + Symptomatic +
    (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)
summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + scale(varATS_par_µL_D0) + scale(Age) + Gender +  
##         Village + Ethnicity + Symptomatic + (1 | Sample)
## Dispersion:                                                                     
##       ~HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     851.4     889.9    -410.7     821.4        81 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Sample (Intercept) 0.1774   0.4212  
## Number of obs: 96, groups:  Sample, 32
## 
## Conditional model:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -3.53832    0.28495 -12.417  < 2e-16 ***
## HBB.genotypeHbAC         0.60207    0.20377   2.955  0.00313 ** 
## HBB.genotypeHbAS         1.91313    0.27670   6.914 4.71e-12 ***
## scale(varATS_par_µL_D0)  0.16761    0.11268   1.487  0.13691    
## scale(Age)              -0.01076    0.08702  -0.124  0.90162    
## Gender1                 -0.11956    0.19471  -0.614  0.53916    
## Village2                 0.01520    0.26069   0.058  0.95352    
## Village3                 0.15883    0.26569   0.598  0.54998    
## Village4                 0.49318    0.42390   1.163  0.24465    
## Ethnicity2               0.03488    0.27032   0.129  0.89734    
## Symptomatic1            -0.10713    0.21216  -0.505  0.61360    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Dispersion model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        7.2872     0.6453  11.292  < 2e-16 ***
## HBB.genotypeHbAC  -0.4302     0.8535  -0.504    0.614    
## HBB.genotypeHbAS  -4.2988     0.7853  -5.474 4.39e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.2155, p-value = 0.472
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.83 0.372 Inf      1.13      2.94    1   2.955  0.0088
##  HbAS / HbAA       6.77 1.870 Inf      3.54     12.96    1   6.914  <.0001
##  HbAS / HbAC       3.71 1.090 Inf      1.87      7.37    1   4.476  <.0001
## 
## Results are averaged over the levels of: Gender, Village, Ethnicity, Symptomatic 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale

Variables do not seem to be overly correlated though.

performance::check_collinearity(model)

The model assumptions improve when we add the parasitemia*genotype interaction effect.

model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype * scale(varATS_par_µL_D0) + 
    scale(Age) + Gender + Village + Ethnicity + Symptomatic +
    (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)
summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype * scale(varATS_par_µL_D0) + scale(Age) + Gender +  
##         Village + Ethnicity + Symptomatic + (1 | Sample)
## Dispersion:                                                                     
##       ~HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     846.1     889.7    -406.0     812.1        79 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Sample (Intercept) 0.1139   0.3375  
## Number of obs: 96, groups:  Sample, 32
## 
## Conditional model:
##                                           Estimate Std. Error z value Pr(>|z|)
## (Intercept)                              -3.494115   0.254713 -13.718  < 2e-16
## HBB.genotypeHbAC                          0.817530   0.211227   3.870 0.000109
## HBB.genotypeHbAS                          1.764612   0.254655   6.929 4.23e-12
## scale(varATS_par_µL_D0)                  -0.106636   0.133963  -0.796 0.426026
## scale(Age)                                0.004667   0.071984   0.065 0.948311
## Gender1                                  -0.124449   0.165801  -0.751 0.452898
## Village2                                 -0.073977   0.245693  -0.301 0.763341
## Village3                                  0.130038   0.225411   0.577 0.564011
## Village4                                  0.522427   0.397570   1.314 0.188829
## Ethnicity2                                0.377407   0.244998   1.540 0.123451
## Symptomatic1                             -0.098498   0.177813  -0.554 0.579617
## HBB.genotypeHbAC:scale(varATS_par_µL_D0)  1.258240   0.470477   2.674 0.007487
## HBB.genotypeHbAS:scale(varATS_par_µL_D0)  0.423132   0.174221   2.429 0.015153
##                                             
## (Intercept)                              ***
## HBB.genotypeHbAC                         ***
## HBB.genotypeHbAS                         ***
## scale(varATS_par_µL_D0)                     
## scale(Age)                                  
## Gender1                                     
## Village2                                    
## Village3                                    
## Village4                                    
## Ethnicity2                                  
## Symptomatic1                                
## HBB.genotypeHbAC:scale(varATS_par_µL_D0) ** 
## HBB.genotypeHbAS:scale(varATS_par_µL_D0) *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Dispersion model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        7.2953     0.6510  11.206  < 2e-16 ***
## HBB.genotypeHbAC  -0.4312     0.8560  -0.504    0.614    
## HBB.genotypeHbAS  -4.4712     0.7904  -5.657 1.54e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.0441, p-value = 0.84
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
## NOTE: Results may be misleading due to involvement in interactions
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       2.26 0.478 Inf      1.38      3.72    1   3.870  0.0003
##  HbAS / HbAA       5.84 1.490 Inf      3.21     10.61    1   6.929  <.0001
##  HbAS / HbAC       2.58 0.756 Inf      1.30      5.12    1   3.232  0.0035
## 
## Results are averaged over the levels of: Gender, Village, Ethnicity, Symptomatic 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale
performance::check_collinearity(model)

But even just adding gender and parasitemia, without the interaction effect, also seems to work, even though gender itself is once again not significant.

model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype + scale(varATS_par_µL_D0) + Gender +
    (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)
summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + scale(varATS_par_µL_D0) + Gender + (1 | Sample)
## Dispersion:                                                                     
##       ~HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     842.4     865.5    -412.2     824.4        87 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Sample (Intercept) 0.1952   0.4418  
## Number of obs: 96, groups:  Sample, 32
## 
## Conditional model:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -3.5094     0.1609 -21.817  < 2e-16 ***
## HBB.genotypeHbAC          0.6323     0.1868   3.385 0.000712 ***
## HBB.genotypeHbAS          2.0239     0.2666   7.592 3.16e-14 ***
## scale(varATS_par_µL_D0)   0.1270     0.1015   1.251 0.210813    
## Gender1                  -0.1352     0.1833  -0.738 0.460685    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Dispersion model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        7.2902     0.6456  11.291  < 2e-16 ***
## HBB.genotypeHbAC  -0.4397     0.8535  -0.515    0.606    
## HBB.genotypeHbAS  -4.3241     0.7851  -5.508 3.63e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.2227, p-value = 0.47
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.88 0.352 Inf      1.21      2.92    1   3.385  0.0021
##  HbAS / HbAA       7.57 2.020 Inf      4.05     14.14    1   7.592  <.0001
##  HbAS / HbAC       4.02 1.120 Inf      2.09      7.74    1   4.982  <.0001
## 
## Results are averaged over the levels of: Gender 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale
performance::check_collinearity(model)

3.11 Likelihood ratio tests to compare complex and reduced models

LRT suggests that the simple model is the better fit and there is no evidence to include the additional factors.

Risk factors / biological relevant factors:

model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype + 
    scale(varATS_par_µL_D0) + scale(Age) + Gender + Village + Ethnicity + Symptomatic +
    (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

model_simple <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype + (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

anova(model, model_simple, test = "LRT")

Interaction effect:

model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype * scale(varATS_par_µL_D0) + 
    scale(Age) + Gender + Village + Ethnicity + Symptomatic +
    (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

anova(model, model_simple, test = "LRT")

Even simpler model containing only the parasitemia interaction, symptomatic or gender factor do not improve model fit:

model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype + 
    Symptomatic +
    (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

anova(model, model_simple, test = "LRT")
model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype + 
    Gender +
    (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

anova(model, model_simple, test = "LRT")
model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype * scale(varATS_par_µL_D0) +
    (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

anova(model, model_simple, test = "LRT")
model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype + scale(varATS_par_µL_D0) + Gender +
    (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

anova(model, model_simple, test = "LRT")
model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype * scale(varATS_par_µL_D0) + Gender +
    (1 | Sample),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

anova(model, model_simple, test = "LRT")

3.12 Adding additional variables as random effects

Since there appears to be some difference between villages, and there are very few samples for some villages, we could try modelling village as a random factor, but this does not lead to a good fitting model.

model <- glmer(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~ 
    HBB.genotype + (1 | Sample) + (1 | Village), 
  family = binomial, data = df)
## boundary (singular) fit: see help('isSingular')
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + (1 | Sample) + (1 | Village)
##    Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    1278.0    1290.8    -634.0    1268.0        91 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -12.2206  -0.9947   0.0491   0.7415  15.3930 
## 
## Random effects:
##  Groups  Name        Variance  Std.Dev. 
##  Sample  (Intercept) 3.268e-01 5.717e-01
##  Village (Intercept) 1.290e-09 3.592e-05
## Number of obs: 96, groups:  Sample, 32; Village, 4
## 
## Fixed effects:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -3.5841     0.1558 -23.002  < 2e-16 ***
## HBB.genotypeHbAC   0.5962     0.2284   2.610  0.00905 ** 
## HBB.genotypeHbAS   1.9299     0.2816   6.852 7.26e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) HBB.HAC
## HBB.gntyHAC -0.682        
## HBB.gntyHAS -0.553  0.377 
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.7871, p-value = 0.092
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.82 0.415 Inf      1.06      3.10    1   2.610  0.0246
##  HbAS / HbAA       6.89 1.940 Inf      3.56     13.33    1   6.852  <.0001
##  HbAS / HbAC       3.79 1.090 Inf      1.93      7.45    1   4.631  <.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale

Adding the extra cofactors improves fit now, but the LRT is still not significantly in favour of the more complex model.

model <- glmmTMB(
  cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
    HBB.genotype +
    scale(varATS_par_µL_D0) + Gender + Symptomatic + 
    # scale(varATS_par_µL_D0) + scale(Age) + Gender + Village + Ethnicity + Symptomatic +
    (1 | Sample) + (1 | Village),
  dispformula = ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)
summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~  
##     HBB.genotype + scale(varATS_par_µL_D0) + Gender + Symptomatic +  
##         (1 | Sample) + (1 | Village)
## Dispersion:                                                                     
##       ~HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     845.3     873.5    -411.6     823.3        85 
## 
## Random effects:
## 
## Conditional model:
##  Groups  Name        Variance  Std.Dev. 
##  Sample  (Intercept) 1.841e-01 4.291e-01
##  Village (Intercept) 8.536e-10 2.922e-05
## Number of obs: 96, groups:  Sample, 32; Village, 4
## 
## Conditional model:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -3.3592     0.2099 -16.000  < 2e-16 ***
## HBB.genotypeHbAC          0.6503     0.1827   3.559 0.000372 ***
## HBB.genotypeHbAS          1.9439     0.2736   7.106  1.2e-12 ***
## scale(varATS_par_µL_D0)   0.1729     0.1088   1.589 0.112103    
## Gender1                  -0.1743     0.1830  -0.952 0.340938    
## Symptomatic1             -0.2047     0.1897  -1.079 0.280530    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Dispersion model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        7.2853     0.6450  11.295  < 2e-16 ***
## HBB.genotypeHbAC  -0.4264     0.8532  -0.500    0.617    
## HBB.genotypeHbAS  -4.3573     0.7864  -5.541 3.01e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.2048, p-value = 0.516
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio   SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.92 0.35 Inf      1.25      2.94    1   3.559  0.0011
##  HbAS / HbAA       6.99 1.91 Inf      3.68     13.26    1   7.106  <.0001
##  HbAS / HbAC       3.65 1.06 Inf      1.85      7.20    1   4.452  <.0001
## 
## Results are averaged over the levels of: Gender, Symptomatic 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 3 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log odds ratio scale
anova(model, model_simple, test = "LRT")

4 Software used

grateful::cite_packages(output = "table", out.dir = here(), cite.tidyverse = TRUE)